* Weights are all treated as aweights (analytical weights) but for robust se (and clustered), each variance estimate
* is weighted accordingly, not each estimated standard deviation; i.e. SSR=sum(w*(r^2)) and not SSR=sum((w*r)^2) 
* Variance matrix always adjusts for small sample bias (VCV=vcv*n/dof) as with ivreg2 with 'small' option
* Last revised 7/13/16

program lsfereg , sortpreserve eclass byable(recall)

version 10
syntax varlist(min=1 numeric ts) [if/] [in/] [aw fw pw iw/] , ABSorb(varlist min=1) [ NOWITHin Robust CLUSTER(varname) PREDict FULLvcv]
marksample touse , strok
markout `touse' `varlist' `absorb' `cluster', strok
* Reduce sample according to `if' and `in'
if "`if'"!="" qui: replace `touse' = `touse' & (`if')
if "`in'"!="" {
	tempvar aux
	qui: gen byte `aux' = `touse' in `in'
	qui: replace `touse' = 0 if `aux'==.
	drop `aux'
}

* Get the weight variables and add the tmp var to markout missing obs
if "`exp'"!="" {
	tempvar w
	qui: gen `w' = 0 + `exp' if `touse'
	markout `touse' `w'
	local wght 1
	qui: summ `w'
	if r(min)<0 {
		di as err "Weights may not be negative"
		error 402 
	}
}
else local wght 0
	
* Get cluster variable if cluster std errors required
if "`cluster'"!="" {
	tempvar gclus
	if "`robust'"!=""  {
		di in red "Cannot have clustered and robust at the same time"
		error 184
	}
	markout `touse' `cluster' , strok
	qui: egen `gclus' = group(`cluster') if `touse'
	qui: summ `gclus'
	local clus_grps = r(max)
	local clus 1
}
else local clus 0
	
* Get options coded in
if "`nowithin'"!="" local wit 0
else local wit 1
if "`robust'"!="" local rob 1
else local rob 0
if "`predict'"!="" local pred 1
else local pred 0
if "`fullvcv'"!="" local fvcv 1
else local fvcv 0
   
* Return error if no observations
summ `touse' , meanonly
local N = `r(N)'
if !`N' {
  di in red "No observations"
  error 2000 
}

	
* Build categorical variables
local xfe		// Used to store the categorical variable used in estimation
local fename	// Used to store name of the relevant variable
local n_max = 1 
foreach g in `absorb' {
  * Create group variable
  tempvar v`g'
  qui: egen `v`g'' = group(`g') if `touse'
  * Throw error if it has N groups
  qui: summ `v`g'' if `touse'
  local ng = `r(max)'
  if `ng'==`r(N)' {
    di in red "No variation after conditioning on Fixed Effects"
    error 2000
  }
  * Set first among fe vars if is has the most groups so far, and last otherwise
  if `ng'>`n_max' {
    local xfe "`v`g'' `xfe'"
    local fename "`g' `fename'"
    local n_max = `ng'
  }
  else {
    if `ng'>1 {
      local xfe "`xfe' `v`g'' "
      local fename "`fename' `g'"
    } 
  }
}

* Check that some fixed effect vars are still used
if "`fename'"=="" {
  di in red "No variation in fixed effect groups. Use the regression command and omit fixed effects"
  error 409
}

* Check if pred and by not called together, and preallocate vars
if `pred' {
  if _by() {
    di in red "Cannot use 'by' with 'predict'"
    error 190
  }
  foreach x in y xb `fename' {
    capture confirm variable `x'_hat
    if !_rc  {
      di in red "`x'_hat already defined"
      error  110
    }
    else qui: gen `x'_hat = .
  }
}
	   
* Run Regression
mata: ols("`varlist'","`xfe'","`touse'","`fename'","`gclus'","`w'",`wit',`rob',`clus',`wght',`pred',`fvcv')

* Get macros to display results
local depvar : word 1 of `varlist'
if (`wit') gettoken f1 fother : fename
else {
  local f1 "N/A"
  local fother "`fename'"
}
if (`wght') local wvar "`exp'"
else local wvar "N/A"
if (`clus') local cvar "`cluster'"
else local cvar "N/A"
if (`clus') local cgrps "`clus_grps'"
else local cgrps "0"


* Display results
display _newline
display as txt _column(18) "LINEAR REGRESSION WITH N-WAY FIXED EFFECTS"  _newline
display as txt "Number of Obs" _column(20) as txt "=" _column(23) as result e(N) _continue
display as txt _column(40) "Degrees of Freedom" _column(70) as txt "=" _column(73) as result e(df_r) 
display as txt "Within Categories" _column(20) as txt "=" _column(23) as result e(N_g1) _continue
display as txt _column(40) "Dummies from F.E." _column(70) as txt "=" _column(73) as result e(N_g2)
display as txt "Overall R2" _column(20) as txt "=" _column(23) as result %6.4f = e(r2)  _continue
display as txt _column(40) "Adj R2" _column(70) as txt "=" _column(73) as result %6.4f = e(r2_a) 
display as txt "Within R2" _column(20) as txt "=" _column(23) as result %6.4f = e(r2w)  _continue
display as txt _column(40) "Adj Within R2" _column(70) as txt "=" _column(73) as result %6.4f = e(r2_aw) 
display as txt "De-meaned variable: " as result "`f1'" _continue
display as txt _column(40) "Weights Used: " as result "`wvar'"
display as txt "Variables for dummies: " as result "`fother'" _continue
display as txt _column(40) "Clustering Variable: " as result "`cvar'" as txt " (" as result "`cgrps'" as txt " clusters)"
display as txt "Dependent Variable: " as result "`depvar'" 
if `rob'==1 ereturn local vcetype "Robust"
if `clus'==1 ereturn local vcetype "Clustered"
ereturn display
	
ereturn local cmd "lsfereg"

end
	

version 10
mata:
void ols(string scalar vxy, string scalar vfe, string scalar touse, string scalar fename, ///
         string scalar gclus, string scalar w,real scalar wit, real scalar rob, real scalar clus,  ///
	      real scalar wght, real scalar pred, real scalar fvcv)
	             
	          
{	
// vxy(names of dep and exo var), vfe(names of tmp fe grps), fename (true names of grps of fe)
// gclus(name of tmp cluster grp), w(name of tmp weight var)
// Indicators: wit(within), rob(robust), pred(predict), clus(cluster), wght(weights), fvcv(full vcv matrix)

xy  =tokens(vxy)		// y + exogenous covariates
FE  =tokens(vfe)
nfe =cols(FE)
FEN =tokens(fename)
nxy =cols(xy)  		// Number of indep vars + y 

// Get data, order: Z,Y,Xn,Xx,1,FE
st_view(XY=.,.,(xy),touse)
n = rows(XY)
BX = XY,J(n,1,1)		// Big X: Y,X,1,FE

// Build FE
FENAME = "" , "_cons"       // Build array of FE names to use in the VCV matrix
F=J(n,0,0)
for (i=1+wit;i<=nfe;i++) {
  st_view(G=.,.,FE[i],touse) ; dm(BX,G,m=.) ; 
  FENAME = FENAME \ ( J(m,1,""),( J(m,1,FEN[i]+"_")+strofreal(2::(m+1)) ) )
}
K=cols(BX)-1;
if (pred) YX=BX; 

	
// Get Weights (normalized to add up to N) (W needed in VCV even though no wght specified)
if (wght) {
	st_view(W=.,.,w,touse) ; W=(W/sum(W))*n     
}
else W=J(n,1,1)	// Weights used for Robust VCV matrix

// Get ssr for y alone (before demeaning)
t=XY[.,1] :- mean(XY[.,1]:*W); SSR_Y1=quadcross(t,W,t); t=.
	
// Demean if within regression
ng2=0
if (wit) {
  printf("Demeaning..."); displayflush();
  st_view(f=.,.,FE[1],touse); 
  // Mean over all the data (1-k)
  if (wght) t2=mean(BX:*W); else t2=mean(BX);  
  f = f,(1..n)';  _sort(f,1); a=1;  
  for (i=2;i<=n;i++) {
    if (f[i,1]==f[i-1,1]) continue
    v=f[a..(i-1),2]; if (wght) t3=mean( BX[v,.]:* (W[v]/quadsum(W[v])*length(v)) ); else t3=mean(BX[v,.]) 
    BX[v,.] = BX[v,.] :- t3 :+t2 ; a=i;  ng2++
  }
  // Do the last group
  v=f[a..n,2]; ng2++; 
  if (wght) t3=mean( BX[v,.]:* (W[v]/quadsum(W[v])*length(v)) ); else t3=mean(BX[v,.]) ; BX[v,.]=BX[v,.] :- t3 :+t2 ;
  t=.; t2=.; t3=.; f=.; v=.; a=.;   // Clean up
}
	
// Build Big Matrix (BM): Y(1),X(nxy),FE(?)
printf("running regression..."); displayflush();
if (wght) BM=quadcross(BX,W,BX) ; else BM=quadcross(BX,BX)
y=1 ; x=2..cols(BM)        // X'X = BM[x,x] = (X,XFE)' * (X,XFE)

// Get ssr for y alone (after demeaning)
SSR_Y2= BM[y,y] - (BM[y,nxy+1]^2)/n    // Y'Y - n*(Y'1)^2 (apropriately weighted)

// Rescale BM so that it is stable, multiplying on both sides by the stdev of each variable
// Rescaling after SSR, since res below is calc with BX, not BM
Scale=1:/sqrt(diagonal(BM)); BM=quadcross(diag(Scale),quadcross(BM,diag(Scale))); _makesymmetric(BM);
	
// Make first inversion, and remove collinear vectors
inv(ixx=BM[x,x]); t=diagonal(ixx):!=0; coll(t,m=.); x=x[m]; ixx=ixx[m,m];
		
// Get estimates
b1=quadcross(ixx,BM[x,y] ) // (X'X)^-1 X'Y
b1=quadcross(diag(Scale[x]),b1/Scale[y])
res=quadcross(BX[.,(y,x)]' ,(1\-b1))  // Y - X*b_hat
s2=res:^2

// Clean up (save mem)
BM=.
	
// Calc b and VCV for the full set of variables
k=rows(b1) ;  b=J(K,1,0) ; bx=x:-1 ; b[bx]=b1  // Shift x to account for Y
SSR = quadcross(res,W,res)
dof = n-k-ng2+1 ; adj = n/dof                  // +1 dof is due to one ng2 not being used

// Resize ixx appropriately (multiple because of rescaleing inverse):
s=diag(Scale[x]);	ixx=quadcross(s,quadcross(ixx,s)); s=.; _makesymmetric(ixx);
if (!rob & !clus) vcv = adj*SSR/n*ixx  
else {
	// Build Robust VCV (Wooldrige, pg. 57 -> VCV = n*adjust * (X'X)^-1 * (X'*W*X) * (X'*X)^-1
	// Adjust dof for obs really used: those with s2>0; as 0 is not exact, get approx 0 
	// (and err on the side of including too many obs). VCV=vcv*n2/(n2-k2)=vcv*n2/(n-k)
	printf("building VCV..."); displayflush();
	BX=BX[.,x]  // Keep only X (which is still huge -incldues some FE-)
	if (rob)	xwx = quadcross(BX,s2:*W,BX)
	if (clus) {
		st_view(t,.,gclus,touse); t=t,(1..n)';  _sort(t,1); a=1;  
		s=res:*sqrt(W); xwx=J(cols(BX),cols(BX),0); ngc=0
		for (i=2;i<=n;i++) {
			if (t[i,1]==t[i-1,1]) continue
			v=t[a..(i-1),2]; a=i ; ngc++ ; xt=cross(BX[v,.],s[v])'; xwx=xwx+quadcross(xt,xt)
	    }
		v=t[a..n,2]; ngc++ ; xt=cross(BX[v,.],s[v])'; xwx=xwx+quadcross(xt,xt)
		_makesymmetric(xwx)
		adj=adj*ngc/(ngc-1)*(n-1)/n ; s=.;t=.;xt=.;v=.;a=.;
	}
	vcv = adj*makesymmetric(quadcross(ixx,quadcross(xwx,ixx)))
}
BX=.

// Put vcv into the full vcv (including the zeros)
VCV=J(K,K,0) ;	VCV[bx,bx]=vcv ;
R2 =1 - SSR / SSR_Y1 ; R2w =1 - SSR / SSR_Y2 ; 
R2a=1-(1-R2)*(n-1)/(n-k-ng2)         // Adj on R2a is not 'adj' because of the +1 in dof
R2aw=1-(1-R2w)*(n-1)/(n-k) 

// Predict variables
if (pred) {
	printf("predicting..."); displayflush();
	y_hat = cross(YX[.,x]',b1)		   // YX includes Y, Add the within FE below
	g_hat = J(n,nfe,0) ; m1=nxy+1	   // Jump y and constant
	for (i=1+wit;i<=nfe;i++) {
		st_view(G=.,.,FE[i],touse)
		fx=max(G)-1							// Omitted first grou
		if (fx==0) continue				// Continue if only 1 grp (constant wiped it out)
		t=m1..(m1+fx-1) ; m1=m1+fx		  	  
		g_hat[.,i] = cross(YX[.,t:+1]' , b[t] )   // Jump over y for XY
	}
	xb_hat = y_hat - rowsum(g_hat)
	if (wit) {
		st_view(t=.,.,FE[1],touse); t=t,(1..n)';  _sort(t,1); a=1;  
		s=YX[.,1]-y_hat;  
		for (i=2; i<=n; i++) {
			if (t[i,1]==t[i-1,1]) continue
			v=t[a..(i-1),2]; a=i; 
			if (wght) r=mean( s[v]:*(W[v]/quadsum(W[v])*length(v)) ); else r=mean(s[v]) 
			g_hat[v,1]=J(length(v),1,r)
		}
		v=a..n; if (wght) r=mean( s[v]:*(W[v]/quadsum(W[v])*length(v)) ); else r=mean(s[v]) 
		g_hat[v,1]=J(length(v),1,r)
		y_hat=y_hat + g_hat[.,1] ; t=.; a=.; v=.; s=.; r=.;
	}
	t=tokens(fename) + J(1,nfe,"_hat")
	st_store(.,("y_hat","xb_hat",t),touse,(y_hat,xb_hat,g_hat))
}
	
	
// Send results to Stata
printf("printing... \n"); displayflush();	
st_eclear()
if (nxy>1) NAME = J(nxy-1,1,"") , (xy[2..cols(xy)] )'  // Constant in FENAME
else NAME = J(0,2,"") 
NAME = NAME \ FENAME
// Keep only a part of the estimates
if (!fvcv) t=nxy ; else t=rows(VCV)
st_matrix("b",b[1..t]') ; st_matrix("V",VCV[1..t,1..t]) ; NAME=NAME[1..t,.]

st_matrixcolstripe("b",NAME) ; st_matrixrowstripe("V",NAME) ; st_matrixcolstripe("V",NAME)
stata("ereturn post b V, esample(`touse') depname(`dep_var')")
// Save Constants:
st_numscalar("e(N)",n) ; st_numscalar("e(N_g1)",ng2)	
// Count non-zero FE of those added as columns
if (wit & nfe==1) st_numscalar("e(N_g2)",0) ; else st_numscalar("e(N_g2)",k-sum(b[1..nxy]:!=0)) 
st_numscalar("e(df_r)",dof) ; 
st_numscalar("e(r2)",R2);   st_numscalar("e(r2_a)",R2a)
st_numscalar("e(r2w)",R2w); st_numscalar("e(r2_aw)",R2aw)
	
// Clean up directories:
unlink("tmp.txt");

return

}	

void coll(real matrix t,real matrix b)
{
// Returns a list with the indices of the non-zero elements of t
s=min((1::length(t)):/t); b=s; 
for (i=s+1;i<=length(t);i++)  if (t[i]) b=b,i 
}  

void inv(real matrix A)
{
// Returns the inverse of A through Cholesky decomposition. 
// See the matlab file inverse.m for more details
n=rows(A); d=J(n,1,.); L=I(n); t=I(n);
  
// Lower triangular matrix
for (i=1;i<=n;i++) {
	// Diagonal elements
	k=1::(i-1);  
	if (i==1) d[i]=A[i,i]; else d[i]=A[i,i]-quadcross(L[i,k]',t[i,k]') 
	if ( abs(d[i])<epsilon(i^3) ) {
		d[i]=0; L[i,.]=J(1,n,0); t[i,.]=J(1,n,0); continue 
	}
	// Off-diagonal
	if (i==n) continue
	v=(i+1)::n; 
	if (i==1) t[v,i]=A[v,i]; else t[v,i]=A[v,i]-quadcross(L[v,k]',t[i,k]');
	L[v,i]=t[v,i]:/d[i];
}
t=.;

// Inverse of lower triangle
iL=I(n); 
for (i=1;i<=n;i++) {
	for (j=i+1;j<=n;j++) {
		v=1::j-1; iL[j,i]=-quadcross(L[j,v]',iL[v,i]); 
	}
}
  
A=quadcross(iL,1:/d,iL);

}

void dm(real matrix X,real matrix G,real scalar m)
{
// Builds dummies for categorical variable G and annexes it to X
// Stores in m the max number of dummies created.
// Always omits the first category - vars should always have more than 1 category
n=rows(G); k=cols(X); m=0; t=G,(1..n)'; _sort(t,1);
// For each category, get the starting and ending indices
v=J(0,2,.);   
for (i=2;i<=n;i++) {
  if (t[i,1]!=t[i-1,1]) {
    if (!m) { 
      // Omit first group
      m++; x1=i; continue; 
    }
    v=v \ x1,i-1; x1=i; m++; 
  }
}
v=v \ x1,n;   // Last group
// Create the matrix of zeros, append to X, and then start writing ones 
X=X,J(n,m,0) ; 
for (i=1;i<=m;i++) {
  X[t[v[i,1]::v[i,2],2] , (k+i)]=J(v[i,2]-v[i,1]+1,1,1);
}

}    

end
	